/*
 * This file contains the core of a bitslice DES implementation for x86/MMX.
 * It is part of John the Ripper password cracker,
 * Copyright (c) 2000-2001,2005,2006,2008,2011,2012,2015 by Solar Designer
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.  (This is a heavily cut-down "BSD license".)
 *
 * Gate counts per S-box: 49 44 46 33 48 46 46 41
 * Average: 44.125
 *
 * The Boolean expressions corresponding to DES S-boxes have been generated
 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
 * John the Ripper password cracker: https://www.openwall.com/john/
 * Being mathematical formulas, they are not copyrighted and are free for reuse
 * by anyone.
 *
 * The x86/MMX code for the S-boxes was generated by Solar Designer using a
 * Perl script, hand-optimized, and then had its instruction scheduling tuned
 * on a Pentium 3 using a brute-force instruction scheduling program running
 * the individual S-boxes in a context similar to that of this file.
 *
 * The effort has been sponsored by Rapid7: https://www.rapid7.com
 *
 * Note: there's some MMX code in x86.S as well (just not for bitslice DES).
 */

#include "arch.h"

#if DES_BS_ASM

#ifdef UNDERSCORES
#define DES_bs_all			_DES_bs_all
#define DES_bs_init_asm			_DES_bs_init_asm
#define DES_bs_crypt			_DES_bs_crypt
#define DES_bs_crypt_25			_DES_bs_crypt_25
#define DES_bs_crypt_LM			_DES_bs_crypt_LM
#endif

/*
 * Some broken systems don't offer section alignments larger than 4 bytes,
 * while for the MMX code we need at least an 8 byte alignment. ALIGN_FIX
 * is here to work around this issue when we happen to get bad addresses.
 */
#ifndef ALIGN_FIX
#ifdef ALIGN_LOG
#define DO_ALIGN(log)			.align log
#else
#define DO_ALIGN(log)			.align 1 << log
#endif
#else
#ifdef ALIGN_LOG
#define DO_ALIGN(log)			.align log; .space 4
#else
#define DO_ALIGN(log)			.align 1 << log; .space 4
#endif
#endif

#ifdef __sun
/* Sun's assembler doesn't recognize .space */
#define DO_SPACE(size)			.zero size
#else
/* Mac OS X assembler doesn't recognize .zero */
#define DO_SPACE(size)			.space size
#endif

/* Sun's assembler can't multiply, but at least it can add... */
#define nptr(n)				n+n+n+n
#define nvec(n)				n+n+n+n+n+n+n+n

#ifdef BSD
.data
#else
.bss
#endif

.globl DES_bs_all
DO_ALIGN(5)
DES_bs_all:
DES_bs_all_KSp:
DO_SPACE(nptr(0x300))
DES_bs_all_KS_p:
DES_bs_all_KS_v:
DO_SPACE(nvec(0x300))
DES_bs_all_E:
DO_SPACE(nptr(96))
DES_bs_all_K:
DO_SPACE(nvec(56))
DES_bs_all_B:
DO_SPACE(nvec(64))
DES_bs_all_tmp:
DO_SPACE(nvec(16))
DES_bs_all_xkeys:
DO_SPACE(nvec(64))
DES_bs_all_pxkeys:
DO_SPACE(nptr(64))
DES_bs_all_keys_changed:
DO_SPACE(4)
DES_bs_all_salt:
DO_SPACE(4)
DES_bs_all_Ens:
DO_SPACE(nptr(48))

#define E(i)				DES_bs_all_E+nptr(i)
#define B(i)				DES_bs_all_B+nvec(i)
#define tmp_at(i)			DES_bs_all_tmp+nvec(i)

#define pnot				tmp_at(0)

#define S1(out1, out2, out3, out4) \
	movq %mm0,tmp_at(1); \
	movq %mm5,%mm7; \
	movq %mm4,tmp_at(4); \
	movq %mm2,%mm6; \
	movq %mm1,tmp_at(2); \
	por %mm2,%mm7; \
	movq %mm3,tmp_at(3); \
	pxor %mm0,%mm6; \
	movq %mm7,tmp_at(5); \
	movq %mm6,%mm1; \
	pandn %mm0,%mm4; \
	pand %mm7,%mm1; \
	movq %mm1,%mm7; \
	por %mm5,%mm7; \
	pxor %mm3,%mm1; \
	pxor %mm4,%mm3; \
	movq %mm1,tmp_at(6); \
	movq %mm3,%mm1; \
	pandn tmp_at(6),%mm3; \
	movq %mm3,tmp_at(7); \
	movq %mm5,%mm3; \
	por %mm0,%mm5; \
	pxor tmp_at(4),%mm3; \
	movq %mm3,tmp_at(8); \
	movq %mm5,%mm0; \
	pandn %mm3,%mm6; \
	pxor %mm2,%mm3; \
	pandn %mm2,%mm4; \
	pandn %mm1,%mm3; \
	pxor %mm3,%mm7; \
	movq tmp_at(7),%mm3; \
	pandn tmp_at(3),%mm5; \
	por %mm7,%mm0; \
	pandn %mm7,%mm3; \
	movq %mm3,tmp_at(9); \
	pand tmp_at(5),%mm7; \
	movq tmp_at(6),%mm3; \
	movq %mm0,%mm2; \
	pxor %mm1,%mm2; \
	pandn tmp_at(4),%mm3; \
	pandn %mm2,%mm4; \
	movq tmp_at(2),%mm2; \
	pxor %mm4,%mm7; \
	pxor tmp_at(8),%mm4; \
	pxor %mm3,%mm5; \
	por %mm3,%mm4; \
	pxor tmp_at(1),%mm4; \
	pxor %mm0,%mm3; \
	pandn %mm3,%mm2; \
	pxor tmp_at(5),%mm0; \
	movq tmp_at(7),%mm3; \
	por tmp_at(2),%mm3; \
	pxor pnot,%mm7; \
	pxor out1,%mm3; \
	pxor %mm7,%mm2; \
	pxor tmp_at(5),%mm4; \
	pxor out3,%mm2; \
	pxor %mm4,%mm7; \
	pxor %mm7,%mm3; \
	movq %mm3,out1; \
	por %mm6,%mm5; \
	por tmp_at(8),%mm7; \
	por %mm5,%mm0; \
	pxor out2,%mm7; \
	pxor %mm4,%mm0; \
	pxor %mm0,%mm7; \
	por tmp_at(4),%mm1; \
	movq tmp_at(2),%mm3; \
	pand tmp_at(9),%mm4; \
	pandn %mm1,%mm0; \
	pxor %mm0,%mm4; \
	por tmp_at(9),%mm3; \
	por tmp_at(2),%mm4; \
	movq %mm2,out3; \
	pxor %mm3,%mm7; \
	pxor %mm5,%mm4; \
	pxor out4,%mm4; \
	movq %mm7,out2; \
	movq %mm4,out4

#define S2(out1, out2, out3, out4) \
	movq %mm2,tmp_at(2); \
	movq %mm1,tmp_at(1); \
	movq %mm5,%mm2; \
	movq %mm4,tmp_at(4); \
	pandn %mm0,%mm2; \
	movq %mm3,tmp_at(3); \
	pandn %mm4,%mm2; \
	movq %mm0,%mm6; \
	movq %mm2,%mm7; \
	pxor pnot,%mm0; \
	por %mm1,%mm7; \
	pxor %mm4,%mm1; \
	movq %mm7,tmp_at(5); \
	pand %mm1,%mm6; \
	movq %mm5,%mm7; \
	pxor %mm4,%mm6; \
	pandn %mm1,%mm7; \
	movq %mm3,%mm4; \
	pxor %mm7,%mm2; \
	pandn %mm6,%mm7; \
	pxor %mm5,%mm1; \
	movq %mm7,tmp_at(7); \
	movq %mm5,%mm7; \
	pand tmp_at(2),%mm5; \
	pand tmp_at(5),%mm2; \
	movq %mm5,tmp_at(8); \
	pandn %mm2,%mm5; \
	pand tmp_at(2),%mm2; \
	movq tmp_at(8),%mm7; \
	pandn tmp_at(3),%mm5; \
	pandn %mm1,%mm7; \
	pxor %mm2,%mm0; \
	movq %mm7,%mm3; \
	pxor %mm0,%mm3; \
	pxor out2,%mm5; \
	pandn tmp_at(1),%mm7; \
	pxor %mm6,%mm7; \
	pxor %mm3,%mm5; \
	movq %mm7,%mm6; \
	movq %mm5,out2; \
	movq tmp_at(7),%mm5; \
	pandn tmp_at(5),%mm4; \
	pandn %mm0,%mm6; \
	pxor tmp_at(5),%mm3; \
	movq %mm1,%mm0; \
	pxor %mm4,%mm6; \
	pxor tmp_at(2),%mm0; \
	pxor %mm0,%mm6; \
	movq %mm0,%mm4; \
	pxor out1,%mm6; \
	pandn tmp_at(1),%mm0; \
	pxor tmp_at(4),%mm2; \
	pxor %mm3,%mm0; \
	movq %mm6,out1; \
	por %mm1,%mm3; \
	por tmp_at(8),%mm0; \
	pxor %mm4,%mm0; \
	movq %mm0,%mm4; \
	pandn tmp_at(2),%mm0; \
	movq tmp_at(3),%mm6; \
	pxor tmp_at(7),%mm0; \
	por %mm7,%mm0; \
	por %mm6,%mm5; \
	pxor %mm0,%mm2; \
	pandn %mm2,%mm7; \
	por %mm2,%mm6; \
	pxor out4,%mm7; \
	pxor %mm4,%mm6; \
	pxor out3,%mm6; \
	pxor %mm5,%mm7; \
	pxor %mm3,%mm7; \
	movq %mm6,out3; \
	movq %mm7,out4

#define S3(out1, out2, out3, out4) \
	movq %mm0,tmp_at(1); \
	movq %mm1,tmp_at(2); \
	movq %mm0,%mm7; \
	pandn %mm0,%mm1; \
	movq %mm2,tmp_at(3); \
	movq %mm5,%mm0; \
	pxor %mm2,%mm0; \
	movq %mm4,tmp_at(4); \
	movq %mm5,%mm2; \
	por %mm0,%mm1; \
	pxor %mm3,%mm2; \
	movq %mm0,%mm4; \
	movq %mm5,%mm6; \
	pandn %mm2,%mm7; \
	pxor tmp_at(2),%mm4; \
	movq %mm7,tmp_at(5); \
	pxor %mm1,%mm7; \
	pandn %mm4,%mm6; \
	movq %mm7,tmp_at(6); \
	pxor %mm6,%mm1; \
	pand %mm0,%mm2; \
	movq %mm1,%mm6; \
	movq %mm3,%mm0; \
	pandn %mm7,%mm6; \
	pand %mm5,%mm7; \
	pand %mm3,%mm5; \
	por %mm3,%mm7; \
	pand tmp_at(1),%mm7; \
	movq tmp_at(4),%mm3; \
	pandn tmp_at(6),%mm3; \
	pxor %mm4,%mm7; \
	pxor tmp_at(1),%mm0; \
	movq %mm7,tmp_at(7); \
	pxor %mm3,%mm7; \
	movq tmp_at(2),%mm3; \
	pxor out4,%mm7; \
	pxor %mm0,%mm1; \
	movq %mm7,out4; \
	movq tmp_at(3),%mm7; \
	por tmp_at(3),%mm1; \
	pandn %mm1,%mm2; \
	por tmp_at(5),%mm0; \
	movq %mm0,%mm1; \
	pandn %mm5,%mm3; \
	pandn tmp_at(7),%mm1; \
	por %mm4,%mm5; \
	pxor %mm3,%mm1; \
	por tmp_at(2),%mm7; \
	movq tmp_at(3),%mm3; \
	pandn %mm1,%mm3; \
	pxor %mm4,%mm0; \
	pandn %mm5,%mm3; \
	movq tmp_at(4),%mm5; \
	pxor tmp_at(1),%mm3; \
	pand %mm2,%mm5; \
	pxor pnot,%mm0; \
	pxor %mm5,%mm3; \
	movq %mm7,%mm5; \
	pxor out2,%mm3; \
	pandn tmp_at(4),%mm6; \
	pandn tmp_at(6),%mm7; \
	pxor %mm0,%mm6; \
	movq %mm3,out2; \
	pxor tmp_at(1),%mm2; \
	por tmp_at(4),%mm1; \
	por %mm2,%mm0; \
	pxor tmp_at(6),%mm5; \
	pxor %mm1,%mm0; \
	pxor out1,%mm6; \
	pxor out3,%mm5; \
	pxor tmp_at(7),%mm0; \
	pxor %mm7,%mm6; \
	pxor %mm5,%mm0; \
	movq %mm6,out1; \
	movq %mm0,out3

#define S4(out1, out2, out3, out4) \
	movq %mm1,%mm7; \
	pxor %mm2,%mm0; \
	por %mm3,%mm1; \
	pxor %mm4,%mm2; \
	movq %mm5,tmp_at(2); \
	pxor %mm4,%mm1; \
	movq %mm7,%mm6; \
	movq %mm7,%mm5; \
	pandn %mm2,%mm7; \
	pandn %mm2,%mm1; \
	por %mm7,%mm4; \
	pxor %mm3,%mm7; \
	movq %mm7,%mm6; \
	por %mm0,%mm7; \
	pxor %mm5,%mm3; \
	movq %mm1,tmp_at(3); \
	pandn %mm7,%mm1; \
	movq %mm1,%mm7; \
	pxor %mm5,%mm1; \
	pand %mm1,%mm6; \
	movq %mm6,%mm5; \
	pxor %mm1,%mm0; \
	pandn %mm2,%mm6; \
	pandn %mm0,%mm6; \
	pxor %mm0,%mm4; \
	movq %mm3,%mm0; \
	pandn %mm4,%mm3; \
	movq tmp_at(2),%mm2; \
	pxor %mm7,%mm3; \
	pxor tmp_at(3),%mm6; \
	movq %mm6,%mm7; \
	pandn %mm2,%mm6; \
	pxor out1,%mm6; \
	pandn %mm7,%mm2; \
	pxor out2,%mm2; \
	pxor %mm3,%mm6; \
	pxor pnot,%mm3; \
	pxor %mm3,%mm2; \
	pxor %mm7,%mm3; \
	movq %mm6,out1; \
	pandn %mm3,%mm0; \
	por %mm5,%mm0; \
	movq %mm2,out2; \
	movq tmp_at(2),%mm3; \
	por %mm1,%mm3; \
	pand tmp_at(2),%mm1; \
	pxor %mm4,%mm0; \
	pxor %mm0,%mm3; \
	pxor out3,%mm3; \
	pxor %mm1,%mm0; \
	movq %mm3,out3; \
	pxor out4,%mm0; \
	movq %mm0,out4

#define S5(out1, out2, out3, out4) \
	movq %mm2,tmp_at(3); \
	movq %mm0,tmp_at(1); \
	por %mm0,%mm2; \
	movq %mm5,%mm6; \
	movq %mm2,tmp_at(4); \
	pandn %mm2,%mm5; \
	movq %mm2,%mm7; \
	movq %mm5,%mm2; \
	pxor %mm0,%mm5; \
	movq %mm3,%mm7; \
	movq %mm5,tmp_at(5); \
	pxor tmp_at(3),%mm5; \
	movq %mm1,tmp_at(2); \
	por %mm5,%mm0; \
	por %mm3,%mm5; \
	pandn %mm2,%mm3; \
	pxor tmp_at(3),%mm3; \
	movq %mm3,tmp_at(6); \
	movq %mm0,%mm1; \
	pand %mm4,%mm3; \
	pxor %mm0,%mm3; \
	pand %mm7,%mm0; \
	pxor %mm7,%mm3; \
	movq %mm3,tmp_at(3); \
	pxor %mm3,%mm6; \
	movq %mm6,%mm2; \
	por tmp_at(5),%mm6; \
	movq %mm6,%mm3; \
	pand %mm4,%mm6; \
	movq %mm6,tmp_at(7); \
	pxor tmp_at(5),%mm6; \
	pxor %mm6,%mm0; \
	movq tmp_at(1),%mm6; \
	movq %mm0,tmp_at(8); \
	pandn %mm3,%mm6; \
	movq tmp_at(2),%mm0; \
	movq %mm6,%mm3; \
	pxor tmp_at(6),%mm6; \
	pxor %mm5,%mm4; \
	pandn %mm4,%mm6; \
	pxor pnot,%mm6; \
	pandn %mm6,%mm0; \
	pxor tmp_at(3),%mm0; \
	movq tmp_at(7),%mm6; \
	pandn tmp_at(6),%mm6; \
	pxor out3,%mm0; \
	pxor %mm4,%mm3; \
	movq %mm0,out3; \
	por tmp_at(8),%mm3; \
	movq tmp_at(6),%mm0; \
	pandn %mm3,%mm6; \
	pand tmp_at(6),%mm1; \
	pand %mm6,%mm2; \
	movq %mm6,%mm3; \
	pandn %mm5,%mm6; \
	pxor %mm4,%mm2; \
	por %mm2,%mm1; \
	pxor tmp_at(4),%mm3; \
	pxor tmp_at(7),%mm1; \
	pand %mm2,%mm7; \
	pand tmp_at(2),%mm1; \
	pxor tmp_at(1),%mm7; \
	pxor tmp_at(8),%mm1; \
	pxor %mm7,%mm3; \
	por tmp_at(2),%mm6; \
	pxor out4,%mm1; \
	movq %mm1,out4; \
	pxor %mm5,%mm0; \
	pxor tmp_at(5),%mm2; \
	pxor %mm3,%mm6; \
	pandn %mm0,%mm3; \
	pand tmp_at(2),%mm5; \
	pxor %mm2,%mm3; \
	pxor out2,%mm5; \
	pxor %mm5,%mm3; \
	pxor out1,%mm6; \
	movq %mm3,out2; \
	movq %mm6,out1

#define S6(out1, out2, out3, out4) \
	movq %mm4,tmp_at(2); \
	pxor %mm1,%mm4; \
	movq %mm5,tmp_at(3); \
	por %mm1,%mm5; \
	movq %mm2,%mm7; \
	pand %mm0,%mm5; \
	pxor %mm0,%mm2; \
	movq %mm0,tmp_at(1); \
	pxor %mm5,%mm4; \
	movq %mm4,tmp_at(4); \
	pxor tmp_at(3),%mm4; \
	movq %mm4,%mm6; \
	pandn tmp_at(2),%mm4; \
	pand %mm0,%mm6; \
	movq %mm6,tmp_at(5); \
	pxor %mm1,%mm6; \
	movq %mm6,tmp_at(6); \
	por %mm2,%mm6; \
	movq %mm6,tmp_at(7); \
	pxor tmp_at(4),%mm6; \
	movq %mm6,%mm0; \
	pand %mm7,%mm6; \
	movq %mm6,tmp_at(8); \
	movq tmp_at(3),%mm6; \
	por %mm1,%mm2; \
	pandn tmp_at(8),%mm6; \
	movq %mm6,tmp_at(9); \
	movq tmp_at(6),%mm6; \
	por %mm4,%mm6; \
	movq %mm6,tmp_at(6); \
	pxor tmp_at(9),%mm6; \
	movq %mm6,tmp_at(10); \
	pand %mm3,%mm6; \
	pxor out4,%mm6; \
	pxor %mm0,%mm6; \
	por tmp_at(1),%mm0; \
	movq %mm6,out4; \
	movq tmp_at(7),%mm6; \
	pxor %mm1,%mm6; \
	movq %mm3,%mm1; \
	movq %mm6,tmp_at(7); \
	pandn tmp_at(3),%mm6; \
	pxor %mm7,%mm6; \
	movq tmp_at(8),%mm7; \
	movq %mm6,tmp_at(12); \
	pandn tmp_at(2),%mm7; \
	pand tmp_at(6),%mm0; \
	por %mm6,%mm7; \
	pxor %mm6,%mm0; \
	movq tmp_at(9),%mm6; \
	por %mm3,%mm4; \
	pandn %mm0,%mm6; \
	por %mm7,%mm5; \
	pxor %mm4,%mm6; \
	pxor tmp_at(4),%mm0; \
	pxor out3,%mm6; \
	pxor %mm2,%mm5; \
	movq %mm6,out3; \
	movq tmp_at(5),%mm6; \
	pandn tmp_at(2),%mm0; \
	pxor pnot,%mm2; \
	pxor tmp_at(7),%mm2; \
	pxor tmp_at(3),%mm6; \
	pxor out2,%mm5; \
	movq tmp_at(12),%mm4; \
	pxor %mm2,%mm0; \
	pxor tmp_at(1),%mm4; \
	pxor tmp_at(10),%mm5; \
	pand %mm6,%mm4; \
	pandn %mm0,%mm3; \
	pxor out1,%mm4; \
	pandn %mm7,%mm1; \
	pxor tmp_at(8),%mm4; \
	pxor %mm2,%mm1; \
	pxor %mm3,%mm5; \
	movq %mm5,out2; \
	pxor %mm1,%mm4; \
	movq %mm4,out1

#define S7(out1, out2, out3, out4) \
	movq %mm0,tmp_at(1); \
	movq %mm4,tmp_at(3); \
	movq %mm4,%mm0; \
	pxor %mm3,%mm4; \
	movq %mm5,tmp_at(4); \
	movq %mm4,%mm7; \
	movq %mm3,tmp_at(2); \
	pxor %mm2,%mm4; \
	movq %mm4,tmp_at(5); \
	pand %mm5,%mm4; \
	movq %mm7,%mm5; \
	pxor tmp_at(4),%mm5; \
	pand %mm3,%mm7; \
	movq %mm7,tmp_at(6); \
	movq %mm7,%mm6; \
	pxor %mm1,%mm7; \
	pand tmp_at(4),%mm6; \
	pxor %mm2,%mm6; \
	movq %mm7,tmp_at(7); \
	movq tmp_at(1),%mm3; \
	movq %mm6,%mm0; \
	por %mm7,%mm6; \
	pand %mm4,%mm7; \
	pxor %mm5,%mm6; \
	pandn %mm3,%mm7; \
	pxor %mm4,%mm0; \
	pxor out4,%mm7; \
	pxor %mm5,%mm4; \
	pxor %mm6,%mm7; \
	movq %mm7,out4; \
	pandn tmp_at(2),%mm4; \
	por tmp_at(6),%mm6; \
	movq tmp_at(5),%mm7; \
	pandn tmp_at(3),%mm7; \
	pandn tmp_at(7),%mm4; \
	movq %mm7,tmp_at(9); \
	por tmp_at(7),%mm7; \
	pandn tmp_at(5),%mm5; \
	pxor %mm0,%mm7; \
	pxor tmp_at(3),%mm0; \
	pxor %mm4,%mm0; \
	movq tmp_at(1),%mm4; \
	pand %mm0,%mm2; \
	por %mm2,%mm6; \
	pxor %mm5,%mm6; \
	pandn %mm6,%mm3; \
	movq %mm6,%mm5; \
	pxor %mm7,%mm3; \
	pxor %mm6,%mm7; \
	por %mm0,%mm6; \
	pxor out1,%mm3; \
	pand tmp_at(4),%mm6; \
	pxor pnot,%mm5; \
	pand %mm6,%mm1; \
	pxor out3,%mm0; \
	pxor %mm7,%mm1; \
	movq %mm3,out1; \
	movq %mm4,%mm3; \
	pxor tmp_at(3),%mm7; \
	por %mm1,%mm2; \
	pxor %mm6,%mm2; \
	por %mm2,%mm7; \
	pand %mm7,%mm4; \
	pxor %mm6,%mm7; \
	por tmp_at(9),%mm7; \
	pxor %mm5,%mm7; \
	pxor out2,%mm1; \
	pandn %mm7,%mm3; \
	pxor %mm4,%mm0; \
	movq %mm0,out3; \
	pxor %mm3,%mm1; \
	movq %mm1,out2

#define S8(out1, out2, out3, out4) \
	movq %mm2,%mm7; \
	movq %mm1,tmp_at(1); \
	pandn %mm2,%mm1; \
	movq %mm2,tmp_at(2); \
	pandn %mm4,%mm2; \
	movq %mm3,tmp_at(3); \
	pxor %mm3,%mm2; \
	movq %mm4,tmp_at(4); \
	movq %mm1,%mm3; \
	movq %mm5,tmp_at(5); \
	movq %mm2,%mm4; \
	movq %mm2,%mm5; \
	pandn tmp_at(1),%mm4; \
	pand %mm0,%mm2; \
	pandn tmp_at(1),%mm7; \
	pandn %mm2,%mm1; \
	pxor tmp_at(4),%mm7; \
	movq %mm4,%mm6; \
	por %mm0,%mm4; \
	movq %mm7,tmp_at(6); \
	pand %mm4,%mm7; \
	pxor pnot,%mm5; \
	por %mm7,%mm2; \
	pxor %mm7,%mm5; \
	pandn tmp_at(2),%mm4; \
	movq tmp_at(5),%mm7; \
	pxor %mm4,%mm5; \
	por %mm1,%mm7; \
	pxor %mm5,%mm3; \
	pxor %mm3,%mm7; \
	pxor %mm0,%mm3; \
	pxor out2,%mm7; \
	movq %mm7,out2; \
	pxor tmp_at(1),%mm5; \
	movq %mm3,%mm4; \
	pand tmp_at(4),%mm3; \
	pxor %mm5,%mm3; \
	por tmp_at(3),%mm5; \
	pxor %mm3,%mm6; \
	pxor tmp_at(6),%mm5; \
	pxor %mm2,%mm3; \
	pxor %mm6,%mm5; \
	por tmp_at(1),%mm3; \
	pxor %mm5,%mm0; \
	pxor %mm4,%mm3; \
	por tmp_at(3),%mm4; \
	pxor tmp_at(4),%mm3; \
	pand tmp_at(5),%mm2; \
	pandn %mm3,%mm4; \
	pand tmp_at(5),%mm0; \
	pxor %mm6,%mm0; \
	por %mm1,%mm4; \
	pxor out4,%mm0; \
	pxor %mm4,%mm5; \
	pxor out3,%mm2; \
	por tmp_at(5),%mm5; \
	pxor out1,%mm5; \
	pxor %mm3,%mm2; \
	pxor %mm6,%mm5; \
	movq %mm0,out4; \
	movq %mm2,out3; \
	movq %mm5,out1

#define zero				%mm5

#define DES_bs_clear_block_8(i) \
	movq zero,B(i); \
	movq zero,B(i + 1); \
	movq zero,B(i + 2); \
	movq zero,B(i + 3); \
	movq zero,B(i + 4); \
	movq zero,B(i + 5); \
	movq zero,B(i + 6); \
	movq zero,B(i + 7)

#define DES_bs_clear_block \
	DES_bs_clear_block_8(0); \
	DES_bs_clear_block_8(8); \
	DES_bs_clear_block_8(16); \
	DES_bs_clear_block_8(24); \
	DES_bs_clear_block_8(32); \
	DES_bs_clear_block_8(40); \
	DES_bs_clear_block_8(48); \
	DES_bs_clear_block_8(56)

#define k_ptr				%edx
#define K(i)				nvec(i)(k_ptr)
#define k(i)				nptr(i)(k_ptr)

#define a1				%mm0
#define a2				%mm1
#define a3				%mm2
#define a4				%mm3
#define a5				%mm4
#define a6				%mm5

#define tmp1				%ecx
#define tmp2				%esi

#define xor_E(i) \
	movl E(i),tmp1; \
	movq K(i),a1; \
	movl E(i + 1),tmp2; \
	movq K(i + 1),a2; \
	pxor (tmp1),a1; \
	pxor (tmp2),a2; \
	movl E(i + 2),tmp1; \
	movq K(i + 2),a3; \
	movl E(i + 3),tmp2; \
	movq K(i + 3),a4; \
	pxor (tmp1),a3; \
	pxor (tmp2),a4; \
	movl E(i + 4),tmp1; \
	movq K(i + 4),a5; \
	movl E(i + 5),tmp2; \
	movq K(i + 5),a6; \
	pxor (tmp1),a5; \
	pxor (tmp2),a6

#define xor_B(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6, k6) \
	movq B(b1),a1; \
	movq B(b2),a2; \
	pxor K(k1),a1; \
	movq B(b3),a3; \
	pxor K(k2),a2; \
	movq B(b4),a4; \
	pxor K(k3),a3; \
	movq B(b5),a5; \
	pxor K(k4),a4; \
	movq B(b6),a6; \
	pxor K(k5),a5; \
	pxor K(k6),a6

#define xor_B_KS_p_prefix(b1, k1, b2, k2, b3, k3, b4, k4, k6) \
	movl k(k1),tmp1; \
	movl k(k2),tmp2; \
	movq B(b1),a1; \
	movq B(b2),a2; \
	pxor (tmp1),a1; \
	movl k(k3),tmp1; \
	pxor (tmp2),a2; \
	movl k(k4),tmp2; \
	movq B(b3),a3; \
	movq B(b4),a4; \
	pxor (tmp1),a3; \
	movl k(k6),tmp1; \
	pxor (tmp2),a4

#define xor_B_KS_p_suffix(b5, k5) \
	movl k(k5),tmp2; \
	movq B(b5),a5; \
	pxor (tmp1),a6; \
	pxor (tmp2),a5

#define xor_B_KS_p(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6, k6) \
	xor_B_KS_p_prefix(b1, k1, b2, k2, b3, k3, b4, k4, k6); \
	movq B(b6),a6; \
	xor_B_KS_p_suffix(b5, k5)

#define xor_B_KS_p_special(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, k6) \
	xor_B_KS_p_prefix(b1, k1, b2, k2, b3, k3, b4, k4, k6); \
	xor_B_KS_p_suffix(b5, k5)

#define mask01				tmp_at(15)

#define v_ptr				%eax
#define V(i)				nvec(i)(v_ptr)

#if 1
#define SHLB1(reg)			paddb reg,reg
#else
#define SHLB1(reg)			psllq $1,reg
#endif

#define FINALIZE_NEXT_KEY_BITS_0_6 \
	movq mask01,%mm7; \
\
	movq V(0),%mm0; \
	movq V(1),%mm1; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	SHLB1(%mm1); \
	psllq $2,%mm2; \
	psllq $3,%mm3; \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	psllq $4,%mm4; \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	psllq $5,%mm5; \
	psllq $6,%mm6; \
	psllq $7,%mm0; \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	movq V(1),%mm1; \
	por %mm3,%mm0; \
	SHLB1(%mm7); \
	movq %mm0,K(0); \
\
	movq V(0),%mm0; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	psrlq $1,%mm0; \
	SHLB1(%mm2); \
	psllq $2,%mm3; \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	psllq $3,%mm4; \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	psllq $4,%mm5; \
	psllq $5,%mm6; \
	psllq $6,%mm0; \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	movq V(1),%mm1; \
	por %mm3,%mm0; \
	SHLB1(%mm7); \
	movq %mm0,K(1); \
\
	movq V(0),%mm0; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	psrlq $2,%mm0; \
	psrlq $1,%mm1; \
	SHLB1(%mm3); \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	psllq $2,%mm4; \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	psllq $3,%mm5; \
	psllq $4,%mm6; \
	psllq $5,%mm0; \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	movq V(1),%mm1; \
	por %mm3,%mm0; \
	SHLB1(%mm7); \
	movq %mm0,K(2); \
\
	movq V(0),%mm0; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	psrlq $3,%mm0; \
	psrlq $2,%mm1; \
	psrlq $1,%mm2; \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	SHLB1(%mm4); \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	psllq $2,%mm5; \
	psllq $3,%mm6; \
	psllq $4,%mm0; \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	movq V(1),%mm1; \
	por %mm3,%mm0; \
	SHLB1(%mm7); \
	movq %mm0,K(3); \
\
	movq V(0),%mm0; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	psrlq $4,%mm0; \
	psrlq $3,%mm1; \
	psrlq $2,%mm2; \
	psrlq $1,%mm3; \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	SHLB1(%mm5); \
	psllq $2,%mm6; \
	psllq $3,%mm0; \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	movq V(1),%mm1; \
	por %mm3,%mm0; \
	SHLB1(%mm7); \
	movq %mm0,K(4); \
\
	movq V(0),%mm0; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	psrlq $5,%mm0; \
	psrlq $4,%mm1; \
	psrlq $3,%mm2; \
	psrlq $2,%mm3; \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	psrlq $1,%mm4; \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	SHLB1(%mm6); \
	psllq $2,%mm0; \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	movq V(1),%mm1; \
	por %mm3,%mm0; \
	SHLB1(%mm7); \
	movq %mm0,K(5); \
\
	movq V(0),%mm0; \
	movq V(2),%mm2; \
	movq V(3),%mm3; \
	pand %mm7,%mm0; \
	pand %mm7,%mm1; \
	pand %mm7,%mm2; \
	pand %mm7,%mm3; \
	psrlq $6,%mm0; \
	psrlq $5,%mm1; \
	psrlq $4,%mm2; \
	psrlq $3,%mm3; \
	por %mm0,%mm1; \
	por %mm2,%mm3; \
	movq V(4),%mm4; \
	movq V(5),%mm5; \
	por %mm1,%mm3; \
	pand %mm7,%mm4; \
	pand %mm7,%mm5; \
	movq V(6),%mm6; \
	movq V(7),%mm0; \
	psrlq $2,%mm4; \
	pand %mm7,%mm6; \
	pand %mm7,%mm0; \
	psrlq $1,%mm5; \
	SHLB1(%mm0); \
	por %mm4,%mm5; \
	por %mm6,%mm3; \
	por %mm5,%mm0; \
	por %mm3,%mm0; \
	movq %mm0,K(6)

.text

DO_ALIGN(5)
.globl DES_bs_init_asm
DES_bs_init_asm:
	pcmpeqd %mm0,%mm0
	movq %mm0,pnot
	paddb %mm0,%mm0
	pxor pnot,%mm0
	movq %mm0,mask01
	ret

#define rounds_and_swapped		%ebp
#define iterations			%eax

DO_ALIGN(5)
.globl DES_bs_crypt
DES_bs_crypt:
	cmpl $0,DES_bs_all_keys_changed
	jz DES_bs_crypt_body
	call DES_bs_finalize_keys
DES_bs_crypt_body:
	movl 4(%esp),iterations
	pxor zero,zero
	pushl %ebp
	pushl %esi
	movl $DES_bs_all_KS_v,k_ptr
	DES_bs_clear_block
	movl $8,rounds_and_swapped
DES_bs_crypt_start:
	xor_E(0)
	S1(B(40), B(48), B(54), B(62))
	xor_E(6)
	S2(B(44), B(59), B(33), B(49))
	xor_E(12)
	S3(B(55), B(47), B(61), B(37))
	xor_E(18)
	S4(B(57), B(51), B(41), B(32))
	xor_E(24)
	S5(B(39), B(45), B(56), B(34))
	xor_E(30)
	S6(B(35), B(60), B(42), B(50))
	xor_E(36)
	S7(B(63), B(43), B(53), B(38))
	xor_E(42)
	S8(B(36), B(58), B(46), B(52))
	cmpl $0x100,rounds_and_swapped
	je DES_bs_crypt_next
DES_bs_crypt_swap:
	xor_E(48)
	S1(B(8), B(16), B(22), B(30))
	xor_E(54)
	S2(B(12), B(27), B(1), B(17))
	xor_E(60)
	S3(B(23), B(15), B(29), B(5))
	xor_E(66)
	S4(B(25), B(19), B(9), B(0))
	xor_E(72)
	S5(B(7), B(13), B(24), B(2))
	xor_E(78)
	S6(B(3), B(28), B(10), B(18))
	xor_E(84)
	S7(B(31), B(11), B(21), B(6))
	xor_E(90)
	addl $nvec(96),k_ptr
	S8(B(4), B(26), B(14), B(20))
	decl rounds_and_swapped
	jnz DES_bs_crypt_start
	subl $nvec(0x300+48),k_ptr
	movl $0x108,rounds_and_swapped
	decl iterations
	jnz DES_bs_crypt_swap
	popl %esi
	popl %ebp
#ifdef EMMS
	emms
#endif
	ret
DES_bs_crypt_next:
	subl $nvec(0x300-48),k_ptr
	movl $8,rounds_and_swapped
	decl iterations
	jnz DES_bs_crypt_start
	popl %esi
	popl %ebp
#ifdef EMMS
	emms
#endif
	ret

DO_ALIGN(5)
.globl DES_bs_crypt_25
DES_bs_crypt_25:
	cmpl $0,DES_bs_all_keys_changed
	jnz DES_bs_finalize_keys_25
DES_bs_crypt_25_body:
	pxor zero,zero
	pushl %ebp
	pushl %esi
	movl $DES_bs_all_KS_v,k_ptr
	DES_bs_clear_block
	movl $8,rounds_and_swapped
	movl $25,iterations
DES_bs_crypt_25_start:
	xor_E(0)
	S1(B(40), B(48), B(54), B(62))
	xor_E(6)
	S2(B(44), B(59), B(33), B(49))
	xor_B(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17)
	S3(B(55), B(47), B(61), B(37))
	xor_B(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23)
	S4(B(57), B(51), B(41), B(32))
	xor_E(24)
	S5(B(39), B(45), B(56), B(34))
	xor_E(30)
	S6(B(35), B(60), B(42), B(50))
	xor_B(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41)
	S7(B(63), B(43), B(53), B(38))
	xor_B(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47)
	S8(B(36), B(58), B(46), B(52))
	cmpl $0x100,rounds_and_swapped
	je DES_bs_crypt_25_next
DES_bs_crypt_25_swap:
	xor_E(48)
	S1(B(8), B(16), B(22), B(30))
	xor_E(54)
	S2(B(12), B(27), B(1), B(17))
	xor_B(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65)
	S3(B(23), B(15), B(29), B(5))
	xor_B(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71)
	S4(B(25), B(19), B(9), B(0))
	xor_E(72)
	S5(B(7), B(13), B(24), B(2))
	xor_E(78)
	S6(B(3), B(28), B(10), B(18))
	xor_B(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89)
	S7(B(31), B(11), B(21), B(6))
	xor_B(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95)
	S8(B(4), B(26), B(14), B(20))
	addl $nvec(96),k_ptr
	decl rounds_and_swapped
	jnz DES_bs_crypt_25_start
	subl $nvec(0x300+48),k_ptr
	movl $0x108,rounds_and_swapped
	decl iterations
	jnz DES_bs_crypt_25_swap
	popl %esi
	popl %ebp
#ifdef EMMS
	emms
#endif
	ret
DES_bs_crypt_25_next:
	subl $nvec(0x300-48),k_ptr
	movl $8,rounds_and_swapped
	decl iterations
	jmp DES_bs_crypt_25_start

DES_bs_finalize_keys_25:
	pushl $DES_bs_crypt_25_body
DES_bs_finalize_keys:
	movl $DES_bs_all_xkeys,v_ptr
	movl $DES_bs_all_K,k_ptr
	movl $0,DES_bs_all_keys_changed
DES_bs_finalize_keys_main_loop:
	FINALIZE_NEXT_KEY_BITS_0_6
	addl $nvec(7),k_ptr
	addl $nvec(8),v_ptr
	cmpl $DES_bs_all_K+nvec(56),k_ptr
	jb DES_bs_finalize_keys_main_loop
	pushl %esi
	movl $DES_bs_all_KSp,k_ptr
	movl $DES_bs_all_KS_v,v_ptr
DES_bs_finalize_keys_expand_loop:
	movl k(0),tmp1
	movl k(1),tmp2
	movq (tmp1),%mm0
	movq (tmp2),%mm1
	movl k(2),tmp1
	movl k(3),tmp2
	movq %mm0,V(0)
	movq %mm1,V(1)
	movq (tmp1),%mm0
	movq (tmp2),%mm1
	movl k(4),tmp1
	movl k(5),tmp2
	movq %mm0,V(2)
	movq %mm1,V(3)
	movq (tmp1),%mm0
	movq (tmp2),%mm1
	movl k(6),tmp1
	movl k(7),tmp2
	movq %mm0,V(4)
	movq %mm1,V(5)
	movq (tmp1),%mm0
	movq (tmp2),%mm1
	addl $nptr(8),k_ptr
	movq %mm0,V(6)
	movq %mm1,V(7)
	addl $nvec(8),v_ptr
	cmpl $DES_bs_all_KSp+nptr(0x300),k_ptr
	jb DES_bs_finalize_keys_expand_loop
	popl %esi
	ret

#define ones				%mm1

#define rounds				%eax

DO_ALIGN(5)
.globl DES_bs_crypt_LM
DES_bs_crypt_LM:
	movl $DES_bs_all_xkeys,v_ptr
	movl $DES_bs_all_K,k_ptr
DES_bs_finalize_keys_LM_loop:
	FINALIZE_NEXT_KEY_BITS_0_6
# bit 7
	SHLB1(%mm7)
	movq V(0),%mm0
	movq V(1),%mm1
	movq V(2),%mm2
	movq V(3),%mm3
	pand %mm7,%mm0
	pand %mm7,%mm1
	pand %mm7,%mm2
	pand %mm7,%mm3
	psrlq $7,%mm0
	psrlq $6,%mm1
	psrlq $5,%mm2
	psrlq $4,%mm3
	por %mm0,%mm1
	por %mm2,%mm3
	movq V(4),%mm4
	movq V(5),%mm5
	por %mm1,%mm3
	pand %mm7,%mm4
	pand %mm7,%mm5
	movq V(6),%mm6
	movq V(7),%mm0
	psrlq $3,%mm4
	pand %mm7,%mm6
	pand %mm7,%mm0
	psrlq $2,%mm5
	psrlq $1,%mm6
	por %mm4,%mm5
	por %mm6,%mm3
	por %mm5,%mm0
	addl $nvec(8),v_ptr
	por %mm3,%mm0
	movq %mm0,K(7)
	addl $nvec(8),k_ptr
	cmpl $DES_bs_all_K+nvec(56),k_ptr
	jb DES_bs_finalize_keys_LM_loop

	pxor zero,zero
	pushl %esi
	pcmpeqd ones,ones
	movl $DES_bs_all_KS_p,k_ptr
	movq zero,B(0)
	movq zero,B(1)
	movq zero,B(2)
	movq zero,B(3)
	movq zero,B(4)
	movq zero,B(5)
	movq zero,B(6)
	movq zero,B(7)
	movq ones,B(8)
	movq ones,B(9)
	movq ones,B(10)
	movq zero,B(11)
	movq ones,B(12)
	movq zero,B(13)
	movq zero,B(14)
	movq zero,B(15)
	movq zero,B(16)
	movq zero,B(17)
	movq zero,B(18)
	movq zero,B(19)
	movq zero,B(20)
	movq zero,B(21)
	movq zero,B(22)
	movq ones,B(23)
	movq zero,B(24)
	movq zero,B(25)
	movq ones,B(26)
	movq zero,B(27)
	movq zero,B(28)
	movq ones,B(29)
	movq ones,B(30)
	movq ones,B(31)
	movq zero,B(32)
	movq zero,B(33)
	movq zero,B(34)
	movq ones,B(35)
	movq zero,B(36)
	movq ones,B(37)
	movq ones,B(38)
	movq ones,B(39)
	movq zero,B(40)
	movq zero,B(41)
	movq zero,B(42)
	movq zero,B(43)
	movq zero,B(44)
	movq ones,B(45)
	movq zero,B(46)
	movq zero,B(47)
	movq ones,B(48)
	movq ones,B(49)
	movq zero,B(50)
	movq zero,B(51)
	movq zero,B(52)
	movq zero,B(53)
	movq ones,B(54)
	movq zero,B(55)
	movq ones,B(56)
	movq zero,B(57)
	movq ones,B(58)
	movq zero,B(59)
	movq ones,B(60)
	movq ones,B(61)
	movq ones,B(62)
	movq ones,B(63)
	movl $8,rounds
DES_bs_crypt_LM_loop:
	xor_B_KS_p_special(31, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5)
	S1(B(40), B(48), B(54), B(62))
	xor_B_KS_p(3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11)
	S2(B(44), B(59), B(33), B(49))
	xor_B_KS_p(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17)
	S3(B(55), B(47), B(61), B(37))
	xor_B_KS_p(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23)
	S4(B(57), B(51), B(41), B(32))
	xor_B_KS_p(15, 24, 16, 25, 17, 26, 18, 27, 19, 28, 20, 29)
	S5(B(39), B(45), B(56), B(34))
	xor_B_KS_p(19, 30, 20, 31, 21, 32, 22, 33, 23, 34, 24, 35)
	S6(B(35), B(60), B(42), B(50))
	xor_B_KS_p(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41)
	S7(B(63), B(43), B(53), B(38))
	xor_B_KS_p(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47)
	S8(B(36), B(58), B(46), B(52))
	xor_B_KS_p_special(63, 48, 32, 49, 33, 50, 34, 51, 35, 52, 53)
	S1(B(8), B(16), B(22), B(30))
	xor_B_KS_p(35, 54, 36, 55, 37, 56, 38, 57, 39, 58, 40, 59)
	S2(B(12), B(27), B(1), B(17))
	xor_B_KS_p(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65)
	S3(B(23), B(15), B(29), B(5))
	xor_B_KS_p(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71)
	S4(B(25), B(19), B(9), B(0))
	xor_B_KS_p(47, 72, 48, 73, 49, 74, 50, 75, 51, 76, 52, 77)
	S5(B(7), B(13), B(24), B(2))
	xor_B_KS_p(51, 78, 52, 79, 53, 80, 54, 81, 55, 82, 56, 83)
	S6(B(3), B(28), B(10), B(18))
	xor_B_KS_p(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89)
	S7(B(31), B(11), B(21), B(6))
	xor_B_KS_p(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95)
	addl $nptr(96),k_ptr
	S8(B(4), B(26), B(14), B(20))
	decl rounds
	jnz DES_bs_crypt_LM_loop
	movl 8(%esp),%eax
	popl %esi
#ifdef EMMS
	emms
#endif
	movl (%eax),%eax
	ret

#endif

#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",@progbits
#endif
